chunk options
CSS for scrollable output & Header colors
Turning scientific / Exponential numbers off
options(scipen = 999)library(tidyverse)
library(ggthemes)library(timetk)
library(lubridate)
theme_viny_bright <- function(){
library(ggthemes)
ggthemes::theme_fivethirtyeight() %+replace%
theme(
axis.title = element_text(),
axis.text = element_text(size = 13),
legend.text = element_text(size = 10),
panel.background = element_rect(fill = "white"),
plot.background = element_rect(fill = "white"),
strip.background = element_blank(),
legend.background = element_rect(fill = NA),
legend.key = element_rect(fill = NA),
plot.title = element_text(hjust = 0.5,
size = 19,
face = "bold"),
plot.subtitle = element_text(hjust = 0.5, colour = "maroon")
)
}
theme_set(theme_viny_bright())bike_sharing_daily <- read.csv("../timetk_1st_try/day.csv")
head(bike_sharing_daily)bike_sharing_daily %>% str()'data.frame': 731 obs. of 16 variables:
$ instant : int 1 2 3 4 5 6 7 8 9 10 ...
$ dteday : chr "2011-01-01" "2011-01-02" "2011-01-03" "2011-01-04" ...
$ season : int 1 1 1 1 1 1 1 1 1 1 ...
$ yr : int 0 0 0 0 0 0 0 0 0 0 ...
$ mnth : int 1 1 1 1 1 1 1 1 1 1 ...
$ holiday : int 0 0 0 0 0 0 0 0 0 0 ...
$ weekday : int 6 0 1 2 3 4 5 6 0 1 ...
$ workingday: int 0 0 1 1 1 1 1 0 0 1 ...
$ weathersit: int 2 2 1 1 1 1 2 2 1 1 ...
$ temp : num 0.344 0.363 0.196 0.2 0.227 ...
$ atemp : num 0.364 0.354 0.189 0.212 0.229 ...
$ hum : num 0.806 0.696 0.437 0.59 0.437 ...
$ windspeed : num 0.16 0.249 0.248 0.16 0.187 ...
$ casual : int 331 131 120 108 82 88 148 68 54 41 ...
$ registered: int 654 670 1229 1454 1518 1518 1362 891 768 1280 ...
$ cnt : int 985 801 1349 1562 1600 1606 1510 959 822 1321 ...
walmart_sales_weekly %>% str()tibble [1,001 x 17] (S3: tbl_df/tbl/data.frame)
$ id : Factor w/ 3331 levels "1_1","1_2","1_3",..: 1 1 1 1 1 1 1 1 1 1 ...
$ Store : num [1:1001] 1 1 1 1 1 1 1 1 1 1 ...
$ Dept : num [1:1001] 1 1 1 1 1 1 1 1 1 1 ...
$ Date : Date[1:1001], format: "2010-02-05" "2010-02-12" ...
$ Weekly_Sales: num [1:1001] 24925 46039 41596 19404 21828 ...
$ IsHoliday : logi [1:1001] FALSE TRUE FALSE FALSE FALSE FALSE ...
$ Type : chr [1:1001] "A" "A" "A" "A" ...
$ Size : num [1:1001] 151315 151315 151315 151315 151315 ...
$ Temperature : num [1:1001] 42.3 38.5 39.9 46.6 46.5 ...
$ Fuel_Price : num [1:1001] 2.57 2.55 2.51 2.56 2.62 ...
$ MarkDown1 : num [1:1001] NA NA NA NA NA NA NA NA NA NA ...
$ MarkDown2 : num [1:1001] NA NA NA NA NA NA NA NA NA NA ...
$ MarkDown3 : num [1:1001] NA NA NA NA NA NA NA NA NA NA ...
$ MarkDown4 : num [1:1001] NA NA NA NA NA NA NA NA NA NA ...
$ MarkDown5 : num [1:1001] NA NA NA NA NA NA NA NA NA NA ...
$ CPI : num [1:1001] 211 211 211 211 211 ...
$ Unemployment: num [1:1001] 8.11 8.11 8.11 8.11 8.11 ...
bike_sharing_daily %>%
plot_time_series(dteday, cnt)Error: Problem with `mutate()` input `.value_smooth`.
x No method for class character.
i Input `.value_smooth` is `auto_smooth(...)`.
Run `rlang::last_error()` to see where the error occurred.
bike_sharing_daily <- bike_sharing_daily %>%
mutate(dteday = as.Date(dteday))
bike_sharing_daily %>%
str()'data.frame': 731 obs. of 16 variables:
$ instant : int 1 2 3 4 5 6 7 8 9 10 ...
$ dteday : Date, format: "2011-01-01" "2011-01-02" ...
$ season : int 1 1 1 1 1 1 1 1 1 1 ...
$ yr : int 0 0 0 0 0 0 0 0 0 0 ...
$ mnth : int 1 1 1 1 1 1 1 1 1 1 ...
$ holiday : int 0 0 0 0 0 0 0 0 0 0 ...
$ weekday : int 6 0 1 2 3 4 5 6 0 1 ...
$ workingday: int 0 0 1 1 1 1 1 0 0 1 ...
$ weathersit: int 2 2 1 1 1 1 2 2 1 1 ...
$ temp : num 0.344 0.363 0.196 0.2 0.227 ...
$ atemp : num 0.364 0.354 0.189 0.212 0.229 ...
$ hum : num 0.806 0.696 0.437 0.59 0.437 ...
$ windspeed : num 0.16 0.249 0.248 0.16 0.187 ...
$ casual : int 331 131 120 108 82 88 148 68 54 41 ...
$ registered: int 654 670 1229 1454 1518 1518 1362 891 768 1280 ...
$ cnt : int 985 801 1349 1562 1600 1606 1510 959 822 1321 ...
bike_sharing_daily %>%
plot_time_series(dteday, cnt)
NAbike_sharing_daily %>%
plot_time_series(dteday,
cnt,
.color_var = lubridate::quarter(dteday, with_year = TRUE)
)bike_sharing_daily %>%
plot_time_series(dteday,
cnt,
.color_var = lubridate::month(dteday)#, with_year = TRUE)
)bike_sharing_daily %>%
plot_time_series(dteday,
cnt,
.color_var = lubridate::semester(dteday, with_year = TRUE)
)bike_sharing_daily %>%
plot_time_series(dteday,
log(cnt),
.color_var = quarter(dteday, with_year = TRUE)
)bike_sharing_daily %>%
plot_anomaly_diagnostics(dteday,
cnt)In video log(cnt) was used for anomaly detection
bike_sharing_daily %>%
plot_anomaly_diagnostics(dteday,
log(cnt)
)walmart_sales_weekly %>% summarise_all(n_distinct)walmart_sales_weekly %>%
group_by(id) %>%
plot_time_series(Date,
Weekly_Sales,
.facet_ncol = 2)walmart_sales_weekly %>%
group_by(id) %>%
plot_time_series(Date,
log(Weekly_Sales),
.facet_ncol = 2)walmart_sales_weekly %>%
group_by(id) %>%
plot_anomaly_diagnostics(Date,
Weekly_Sales,
.facet_ncol = 2)bike_sharing_daily %>%
plot_seasonal_diagnostics(dteday,
cnt)bike_sharing_daily %>%
plot_seasonal_diagnostics(dteday,
cnt)first 2 groups
walmart_sales_weekly %>%
group_by(id) %>%
filter(group_indices() %in% 1:2) %>%
plot_seasonal_diagnostics(Date,
Weekly_Sales)bike_sharing_daily %>%
plot_seasonal_diagnostics(dteday,
cnt,
.feature_set = "wday.lbl")bike_sharing_daily %>%
plot_seasonal_diagnostics(dteday,
cnt,
.feature_set = "wday.lbl",
.geom = c("violin")
)walmart_sales_weekly %>%
group_by(id) %>%
filter(group_indices() %in% 1:2) %>%
plot_seasonal_diagnostics(Date,
Weekly_Sales,
.feature_set = "wday.lbl")walmart_sales_weekly %>%
group_by(id) %>%
filter(group_indices() %in% 1) %>%
plot_seasonal_diagnostics(Date,
Weekly_Sales,
.feature_set = "week")walmart_sales_weekly %>%
group_by(id) %>%
filter(group_indices() %in% 1:2) %>%
plot_seasonal_diagnostics(Date,
Weekly_Sales,
.feature_set = "week")walmart_sales_weekly %>%
group_by(id) %>%
filter(group_indices() %in% 1:2) %>%
plot_seasonal_diagnostics(Date,
Weekly_Sales,
.feature_set = "month.lbl")walmart_sales_weekly %>%
group_by(id) %>%
filter(group_indices() %in% 1:2) %>%
plot_seasonal_diagnostics(Date,
Weekly_Sales,
.feature_set = "hour")walmart_sales_weekly %>%
group_by(id) %>%
filter(group_indices() %in% 1) %>%
plot_seasonal_diagnostics(Date,
Weekly_Sales,
.feature_set = "hour")bike_sharing_daily %>%
plot_seasonal_diagnostics(dteday,
cnt,
.feature_set = "hour")Looks like we don’t have hour wise data in our data frame
library(workflows)
library(parsnip)
library(tidyquant)bikes_tbl <- bike_sharing_daily %>%
select(dteday, cnt) %>%
rename(date = dteday,
value = cnt)
str(bikes_tbl)'data.frame': 731 obs. of 2 variables:
$ date : Date, format: "2011-01-01" "2011-01-02" ...
$ value: int 985 801 1349 1562 1600 1606 1510 959 822 1321 ...
understanding splitting of data visually
bikes_tbl %>%
ggplot(aes(x = date, y = value)) +
geom_rect(xmin = as.numeric(ymd("2012-07-01")),
xmax = as.numeric(ymd("2013-01-01")),
ymin = 0, ymax = 10000,
fill = palette_light()[[4]], alpha = 0.01
) +
annotate("text", x = ymd("2011-10-01"), y = 7800,
color = palette_light()[[1]], label = "Train Region") +
annotate("text", x = ymd("2012-10-01"), y = 1550,
color = palette_light()[[1]], label = "Test Region") +
geom_point(alpha = 0.5, color = palette_light()[[1]]) +
labs(title = "Bikes sharing dataset") +
theme_tq()train_tbl <- bikes_tbl %>% filter(date < ymd("2012-07-01"))
test_tbl <- bikes_tbl %>% filter(date >= ymd("2012-07-01"))dim(train_tbl)[1] 547 2
dim(test_tbl)[1] 184 2
recipe_spec_ts <- recipe(value ~ .,
data = train_tbl) %>%
step_timeseries_signature(date)
recipe_spec_tsData Recipe
Inputs:
Operations:
Timeseries signature features from date
baked <- bake(prep(recipe_spec_ts), new_data = train_tbl)
head(baked)str(baked)tibble [547 x 29] (S3: tbl_df/tbl/data.frame)
$ date : Date[1:547], format: "2011-01-01" "2011-01-02" ...
$ value : int [1:547] 985 801 1349 1562 1600 1606 1510 959 822 1321 ...
$ date_index.num: int [1:547] 1293840000 1293926400 1294012800 1294099200 1294185600 1294272000 1294358400 1294444800 1294531200 1294617600 ...
$ date_year : int [1:547] 2011 2011 2011 2011 2011 2011 2011 2011 2011 2011 ...
$ date_year.iso : int [1:547] 2010 2010 2011 2011 2011 2011 2011 2011 2011 2011 ...
$ date_half : int [1:547] 1 1 1 1 1 1 1 1 1 1 ...
$ date_quarter : int [1:547] 1 1 1 1 1 1 1 1 1 1 ...
$ date_month : int [1:547] 1 1 1 1 1 1 1 1 1 1 ...
$ date_month.xts: int [1:547] 0 0 0 0 0 0 0 0 0 0 ...
$ date_month.lbl: Ord.factor w/ 12 levels "January"<"February"<..: 1 1 1 1 1 1 1 1 1 1 ...
$ date_day : int [1:547] 1 2 3 4 5 6 7 8 9 10 ...
$ date_hour : int [1:547] 0 0 0 0 0 0 0 0 0 0 ...
$ date_minute : int [1:547] 0 0 0 0 0 0 0 0 0 0 ...
$ date_second : int [1:547] 0 0 0 0 0 0 0 0 0 0 ...
$ date_hour12 : int [1:547] 0 0 0 0 0 0 0 0 0 0 ...
$ date_am.pm : int [1:547] 1 1 1 1 1 1 1 1 1 1 ...
$ date_wday : int [1:547] 7 1 2 3 4 5 6 7 1 2 ...
$ date_wday.xts : int [1:547] 6 0 1 2 3 4 5 6 0 1 ...
$ date_wday.lbl : Ord.factor w/ 7 levels "Sunday"<"Monday"<..: 7 1 2 3 4 5 6 7 1 2 ...
$ date_mday : int [1:547] 1 2 3 4 5 6 7 8 9 10 ...
$ date_qday : int [1:547] 1 2 3 4 5 6 7 8 9 10 ...
$ date_yday : int [1:547] 1 2 3 4 5 6 7 8 9 10 ...
$ date_mweek : int [1:547] 1 2 2 2 2 2 2 2 3 3 ...
$ date_week : int [1:547] 1 1 1 1 1 1 1 2 2 2 ...
$ date_week.iso : int [1:547] 52 52 1 1 1 1 1 1 1 2 ...
$ date_week2 : int [1:547] 1 1 1 1 1 1 1 0 0 0 ...
$ date_week3 : int [1:547] 1 1 1 1 1 1 1 2 2 2 ...
$ date_week4 : int [1:547] 1 1 1 1 1 1 1 2 2 2 ...
$ date_mday7 : int [1:547] 1 1 1 1 1 1 2 2 2 2 ...
recipe_spec_final <- recipe_spec_ts %>%
#step_rm(date) # keeping this commented as it creates problem in use some algorithm
step_rm(contains("iso"),
contains("minute"),
contains("hour"),
contains("am.pm"),
contains("xts")
) %>%
step_normalize(contains("index.num"), date_year) %>%
step_dummy(contains("lbl"), one_hot = TRUE)
recipe_spec_finalData Recipe
Inputs:
Operations:
Timeseries signature features from date
Delete terms contains("iso"), contains("minute"), ...
Centering and scaling for contains("index.num"), date_year
Dummy variables from contains("lbl")
baked_final <- bake(prep(recipe_spec_final), new_data = train_tbl)
baked_final %>% head()str(baked_final)tibble [547 x 38] (S3: tbl_df/tbl/data.frame)
$ date : Date[1:547], format: "2011-01-01" "2011-01-02" ...
$ value : int [1:547] 985 801 1349 1562 1600 1606 1510 959 822 1321 ...
$ date_index.num : num [1:547] -1.73 -1.72 -1.71 -1.71 -1.7 ...
$ date_year : num [1:547] -0.705 -0.705 -0.705 -0.705 -0.705 ...
$ date_half : int [1:547] 1 1 1 1 1 1 1 1 1 1 ...
$ date_quarter : int [1:547] 1 1 1 1 1 1 1 1 1 1 ...
$ date_month : int [1:547] 1 1 1 1 1 1 1 1 1 1 ...
$ date_day : int [1:547] 1 2 3 4 5 6 7 8 9 10 ...
$ date_second : int [1:547] 0 0 0 0 0 0 0 0 0 0 ...
$ date_wday : int [1:547] 7 1 2 3 4 5 6 7 1 2 ...
$ date_mday : int [1:547] 1 2 3 4 5 6 7 8 9 10 ...
$ date_qday : int [1:547] 1 2 3 4 5 6 7 8 9 10 ...
$ date_yday : int [1:547] 1 2 3 4 5 6 7 8 9 10 ...
$ date_mweek : int [1:547] 1 2 2 2 2 2 2 2 3 3 ...
$ date_week : int [1:547] 1 1 1 1 1 1 1 2 2 2 ...
$ date_week2 : int [1:547] 1 1 1 1 1 1 1 0 0 0 ...
$ date_week3 : int [1:547] 1 1 1 1 1 1 1 2 2 2 ...
$ date_week4 : int [1:547] 1 1 1 1 1 1 1 2 2 2 ...
$ date_mday7 : int [1:547] 1 1 1 1 1 1 2 2 2 2 ...
$ date_month.lbl_01: num [1:547] 1 1 1 1 1 1 1 1 1 1 ...
..- attr(*, "assign")= int [1:12] 1 1 1 1 1 1 1 1 1 1 ...
..- attr(*, "contrasts")=List of 1
.. ..$ date_month.lbl: chr "contr.poly"
$ date_month.lbl_02: num [1:547] 0 0 0 0 0 0 0 0 0 0 ...
..- attr(*, "assign")= int [1:12] 1 1 1 1 1 1 1 1 1 1 ...
..- attr(*, "contrasts")=List of 1
.. ..$ date_month.lbl: chr "contr.poly"
$ date_month.lbl_03: num [1:547] 0 0 0 0 0 0 0 0 0 0 ...
..- attr(*, "assign")= int [1:12] 1 1 1 1 1 1 1 1 1 1 ...
..- attr(*, "contrasts")=List of 1
.. ..$ date_month.lbl: chr "contr.poly"
$ date_month.lbl_04: num [1:547] 0 0 0 0 0 0 0 0 0 0 ...
..- attr(*, "assign")= int [1:12] 1 1 1 1 1 1 1 1 1 1 ...
..- attr(*, "contrasts")=List of 1
.. ..$ date_month.lbl: chr "contr.poly"
$ date_month.lbl_05: num [1:547] 0 0 0 0 0 0 0 0 0 0 ...
..- attr(*, "assign")= int [1:12] 1 1 1 1 1 1 1 1 1 1 ...
..- attr(*, "contrasts")=List of 1
.. ..$ date_month.lbl: chr "contr.poly"
$ date_month.lbl_06: num [1:547] 0 0 0 0 0 0 0 0 0 0 ...
..- attr(*, "assign")= int [1:12] 1 1 1 1 1 1 1 1 1 1 ...
..- attr(*, "contrasts")=List of 1
.. ..$ date_month.lbl: chr "contr.poly"
$ date_month.lbl_07: num [1:547] 0 0 0 0 0 0 0 0 0 0 ...
..- attr(*, "assign")= int [1:12] 1 1 1 1 1 1 1 1 1 1 ...
..- attr(*, "contrasts")=List of 1
.. ..$ date_month.lbl: chr "contr.poly"
$ date_month.lbl_08: num [1:547] 0 0 0 0 0 0 0 0 0 0 ...
..- attr(*, "assign")= int [1:12] 1 1 1 1 1 1 1 1 1 1 ...
..- attr(*, "contrasts")=List of 1
.. ..$ date_month.lbl: chr "contr.poly"
$ date_month.lbl_09: num [1:547] 0 0 0 0 0 0 0 0 0 0 ...
..- attr(*, "assign")= int [1:12] 1 1 1 1 1 1 1 1 1 1 ...
..- attr(*, "contrasts")=List of 1
.. ..$ date_month.lbl: chr "contr.poly"
$ date_month.lbl_10: num [1:547] 0 0 0 0 0 0 0 0 0 0 ...
..- attr(*, "assign")= int [1:12] 1 1 1 1 1 1 1 1 1 1 ...
..- attr(*, "contrasts")=List of 1
.. ..$ date_month.lbl: chr "contr.poly"
$ date_month.lbl_11: num [1:547] 0 0 0 0 0 0 0 0 0 0 ...
..- attr(*, "assign")= int [1:12] 1 1 1 1 1 1 1 1 1 1 ...
..- attr(*, "contrasts")=List of 1
.. ..$ date_month.lbl: chr "contr.poly"
$ date_month.lbl_12: num [1:547] 0 0 0 0 0 0 0 0 0 0 ...
..- attr(*, "assign")= int [1:12] 1 1 1 1 1 1 1 1 1 1 ...
..- attr(*, "contrasts")=List of 1
.. ..$ date_month.lbl: chr "contr.poly"
$ date_wday.lbl_1 : num [1:547] 0 1 0 0 0 0 0 0 1 0 ...
..- attr(*, "assign")= int [1:7] 1 1 1 1 1 1 1
..- attr(*, "contrasts")=List of 1
.. ..$ date_wday.lbl: chr "contr.poly"
$ date_wday.lbl_2 : num [1:547] 0 0 1 0 0 0 0 0 0 1 ...
..- attr(*, "assign")= int [1:7] 1 1 1 1 1 1 1
..- attr(*, "contrasts")=List of 1
.. ..$ date_wday.lbl: chr "contr.poly"
$ date_wday.lbl_3 : num [1:547] 0 0 0 1 0 0 0 0 0 0 ...
..- attr(*, "assign")= int [1:7] 1 1 1 1 1 1 1
..- attr(*, "contrasts")=List of 1
.. ..$ date_wday.lbl: chr "contr.poly"
$ date_wday.lbl_4 : num [1:547] 0 0 0 0 1 0 0 0 0 0 ...
..- attr(*, "assign")= int [1:7] 1 1 1 1 1 1 1
..- attr(*, "contrasts")=List of 1
.. ..$ date_wday.lbl: chr "contr.poly"
$ date_wday.lbl_5 : num [1:547] 0 0 0 0 0 1 0 0 0 0 ...
..- attr(*, "assign")= int [1:7] 1 1 1 1 1 1 1
..- attr(*, "contrasts")=List of 1
.. ..$ date_wday.lbl: chr "contr.poly"
$ date_wday.lbl_6 : num [1:547] 0 0 0 0 0 0 1 0 0 0 ...
..- attr(*, "assign")= int [1:7] 1 1 1 1 1 1 1
..- attr(*, "contrasts")=List of 1
.. ..$ date_wday.lbl: chr "contr.poly"
$ date_wday.lbl_7 : num [1:547] 1 0 0 0 0 0 0 1 0 0 ...
..- attr(*, "assign")= int [1:7] 1 1 1 1 1 1 1
..- attr(*, "contrasts")=List of 1
.. ..$ date_wday.lbl: chr "contr.poly"
model_spec_glmnet <- linear_reg(mode = "regression") %>%
set_engine("lm")workflow_glmnet <- workflow() %>%
add_recipe(recipe_spec_final) %>%
add_model(model_spec_glmnet)
workflow_glmnet== Workflow ==========================================================================
Preprocessor: Recipe
Model: linear_reg()
-- Preprocessor ----------------------------------------------------------------------
4 Recipe Steps
* step_timeseries_signature()
* step_rm()
* step_normalize()
* step_dummy()
-- Model -----------------------------------------------------------------------------
Linear Regression Model Specification (regression)
Computational engine: lm
workflow_trained_glmnet <- workflow_glmnet %>%
fit(data = train_tbl)
workflow_trained_glmnet== Workflow [trained] ================================================================
Preprocessor: Recipe
Model: linear_reg()
-- Preprocessor ----------------------------------------------------------------------
4 Recipe Steps
* step_timeseries_signature()
* step_rm()
* step_normalize()
* step_dummy()
-- Model -----------------------------------------------------------------------------
Call:
stats::lm(formula = ..y ~ ., data = data)
Coefficients:
(Intercept) date date_index.num date_year
-7532592.347 495.618 NA -84513.676
date_half date_quarter date_month date_day
-1871.725 108808.572 -50395.422 -1579.073
date_second date_wday date_mday date_qday
NA 22.248 NA 1200.819
date_yday date_mweek date_week date_week2
NA -432.567 -227.352 59.342
date_week3 date_week4 date_mday7 date_month.lbl_01
23.963 -2.619 -143.249 -3401.120
date_month.lbl_02 date_month.lbl_03 date_month.lbl_04 date_month.lbl_05
-4153.950 -110.377 -661.662 596.505
date_month.lbl_06 date_month.lbl_07 date_month.lbl_08 date_month.lbl_09
NA 2756.234 1166.565 NA
date_month.lbl_10 date_month.lbl_11 date_month.lbl_12 date_wday.lbl_1
2015.666 NA NA 338.453
date_wday.lbl_2 date_wday.lbl_3 date_wday.lbl_4 date_wday.lbl_5
226.378 292.336 15.391 108.059
date_wday.lbl_6 date_wday.lbl_7
NA NA
prediction_glmnet_tbl <- workflow_trained_glmnet %>%
predict(test_tbl) %>%
bind_cols(test_tbl)
prediction_glmnet_tblbikes_tbl %>%
ggplot(aes(x = date, y = value)) +
geom_rect(xmin = as.numeric(ymd("2012-07-01")),
xmax = as.numeric(ymd("2013-01-01")),
ymin = 0, ymax = 10000,
fill = palette_light()[[4]], alpha = 0.01
) +
annotate("text", x = ymd("2011-10-01"), y = 7800,
color = palette_light()[[1]], label = "Train Region") +
annotate("text", x = ymd("2012-10-01"), y = 1550,
color = palette_light()[[1]], label = "Test Region") +
geom_point(aes(x = date, y = value),
alpha = 0.5, color = palette_light()[[1]]) +
#Add predictions
geom_point(aes(x = date, y = .pred), data = prediction_glmnet_tbl,
alpha = 0.5, color = palette_light()[[2]]) +
labs(title = "Bikes sharing dataset with predictions") +
theme_tq()prediction_glmnet_tbl %>%
metrics(value, .pred)prediction_glmnet_tbl %>%
ggplot(aes(x = date, y = value - .pred)) +
geom_hline(yintercept = 0, color = "red") +
geom_point(color = palette_light()[[1]], alpha = 0.5) +
geom_smooth() +
theme_tq() +
labs(title = "GLM Model residuals on test set") +
scale_y_continuous(limits = c(-5000, 5000))NAhead(idx)[1] "2011-01-01" "2011-01-02" "2011-01-03" "2011-01-04" "2011-01-05" "2011-01-06"
idx_future <- idx %>% tk_make_future_timeseries(length_out = 200)
head(idx_future)[1] "2013-01-01" "2013-01-02" "2013-01-03" "2013-01-04" "2013-01-05" "2013-01-06"
future_tbl <- tibble(date = idx_future)
future_tblfuture_predictions_tbl <- workflow_trained_glmnet %>%
fit(data = bikes_tbl) %>%
predict(future_tbl) %>%
bind_cols(future_tbl)
head(future_predictions_tbl)bikes_tbl %>%
ggplot(aes(x = date, y = value)) +
geom_rect(xmin = as.numeric(ymd("2012-07-01")),
xmax = as.numeric(ymd("2013-01-01")),
ymin = 0, ymax = 10000,
fill = palette_light()[[4]], alpha = 0.01
) +
geom_rect(xmin = as.numeric(ymd("2013-01-01")),
xmax = as.numeric(ymd("2013-07-01")),
ymin = 0, ymax = 10000,
fill = palette_light()[[5]], alpha = 0.01
) +
annotate("text", x = ymd("2011-10-01"), y = 7800,
color = palette_light()[[1]], label = "Train Region") +
annotate("text", x = ymd("2012-10-01"), y = 1550,
color = palette_light()[[1]], label = "Test Region") +
annotate("text", x = ymd("2013-04-01"), y = 1550,
color = palette_light()[[1]], label = "Forecast Region") +
geom_point(#aes(x = date, y = value),
alpha = 0.5, color = palette_light()[[1]]) +
#Add predictions
geom_point(aes(x = date, y = .pred), data = prediction_glmnet_tbl,
alpha = 0.5, color = palette_light()[[2]]) +
geom_point(aes(x = date, y = .pred), data = future_predictions_tbl,
alpha = 0.5, color = palette_light()[[2]]) +
geom_smooth(aes(x = date, y = .pred), data = future_predictions_tbl,
method = "loess") +
labs(title = "Bikes sharing dataset with predictions") +
theme_tq()future_predictions_tbl <- future_predictions_tbl %>%
mutate(lo.95 = .pred - 1.96 * test_resid_sd$stdev,
lo.80 = .pred - 1.28 * test_resid_sd$stdev,
hi.80 = .pred + 1.28 * test_resid_sd$stdev,
hi.95 = .pred + 1.96 * test_resid_sd$stdev
)
head(future_predictions_tbl)bikes_tbl %>%
ggplot(aes(x = date, y = value)) +
geom_point(alpha = 0.5, color = palette_light()[[1]]) +
geom_ribbon(aes(y = .pred, ymin = lo.95, ymax = hi.95),
data = future_predictions_tbl,
fill = "#050BFF", color = NA, size = 0) +
geom_ribbon(aes(y = .pred, ymin = lo.80, ymax = hi.80, fill = key),
data = future_predictions_tbl,
fill = "#596DD5", color = NA, size = 0, alpha = 0.8) +
geom_point(aes(x = date, y = .pred), data = future_predictions_tbl,
alpha = 0.5, color = palette_light()[[2]]) +
geom_smooth(aes(x = date, y = .pred), data = future_predictions_tbl,
method = "loess", color = "white") +
labs(title = "Bikes Shaing Dataset") +
theme_tq()